/*
 * Copyright (2021) The Delta Lake Project Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.delta.sharing.server.common.actions

import java.util.UUID

import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import org.apache.hadoop.fs.Path

import io.delta.sharing.server.DeltaSharingUnsupportedOperationException

/**
 * Information about a deletion vector attached to a file action.
 *
 * Note: This is a stripped down version from runtime providing the minimum
 * support for deserializing deletion vector descriptors from the delta log.
 */
case class DeletionVectorDescriptor(
    /**
     * Indicates how the DV is stored.
     * Should be a single letter (see [[pathOrInlineDv]] below.)
     */
    storageType: String,
    /**
     * Contains the actual data that allows accessing the DV.
     *
     * Three options are currently supported:
     * - `storageType="u"` format: `<random prefix - optional><base85 encoded uuid>`
     *                            The deletion vector is stored in a file with a path relative to
     *                            the data directory of this Delta Table, and the file name can be
     *                            reconstructed from the UUID.
     *                            The encoded UUID is always exactly 20 characters, so the random
     *                            prefix length can be determined any characters exceeding 20.
     * - `storageType="i"` format: `<base85 encoded bytes>`
     *                            The deletion vector is stored inline in the log.
     * - `storageType="p"` format: `<absolute path>`
     *                             The DV is stored in a file with an absolute path given by this
     *                             url.
     */
    pathOrInlineDv: String,
    /**
     * Start of the data for this DV in number of bytes from the beginning of the file it is stored
     * in.
     *
     * Always None when storageType = "i".
     */
    @JsonDeserialize(contentAs = classOf[java.lang.Integer])
    offset: Option[Int] = None,
    /** Size of the serialized DV in bytes (raw data size, i.e. before base85 encoding). */
    sizeInBytes: Int,
    /** Number of rows the DV logically removes from the file. */
    cardinality: Long,
    /**
     * Transient property that is used to validate DV correctness.
     * It is not stored in the log.
     */
    @JsonDeserialize(contentAs = classOf[java.lang.Long])
    maxRowIndex: Option[Long] = None) {

  import DeletionVectorDescriptor._

  @JsonIgnore
  def isOnDisk: Boolean = !isInline

  @JsonIgnore
  def isInline: Boolean = storageType == INLINE_DV_MARKER

  @JsonIgnore
  def isRelative: Boolean = storageType == UUID_DV_MARKER

  @JsonIgnore
  def isAbsolute: Boolean = storageType == PATH_DV_MARKER

  def absolutePath(tableLocation: Path): Path = {
    require(isOnDisk, "Can't get a path for an inline deletion vector")
    storageType match {
      case UUID_DV_MARKER =>
        // If the file was written with a random prefix, we have to extract that,
        // before decoding the UUID.
        val randomPrefixLength = pathOrInlineDv.length - Codec.Base85Codec.ENCODED_UUID_LENGTH
        val (randomPrefix, encodedUuid) = pathOrInlineDv.splitAt(randomPrefixLength)
        val uuid = Codec.Base85Codec.decodeUUID(encodedUuid)
        assembleDeletionVectorPath(tableLocation, uuid, randomPrefix)
      case PATH_DV_MARKER =>
        throw new DeltaSharingUnsupportedOperationException(
          "Table contains absolute paths and cannot be shared through delta sharing")
      case _ =>
        throw new DeltaSharingUnsupportedOperationException(
          s"DELTA_CANNOT_RECONSTRUCT_PATH_FROM_URI_$pathOrInlineDv")
    }
  }
}

object DeletionVectorDescriptor {
  /** String that is used in all file names generated by deletion vector store */
  val DELETION_VECTOR_FILE_NAME_CORE = "deletion_vector"
  // Markers to separate different kinds of DV storage.
  final val PATH_DV_MARKER: String = "p"
  final val INLINE_DV_MARKER: String = "i"
  final val UUID_DV_MARKER: String = "u"

  def apply(dv: DeletionVectorDescriptor): DeletionVectorDescriptor = {
    if (dv == null) {
      return null
    }
    DeletionVectorDescriptor(
      storageType = dv.storageType,
      pathOrInlineDv = dv.pathOrInlineDv,
      offset = dv.offset,
      sizeInBytes = dv.sizeInBytes,
      cardinality = dv.cardinality
    )
  }

  /**
   * Return the unique path under `parentPath` that is based on `id`.
   *
   * Optionally, prepend a `prefix` to the name.
   */
  def assembleDeletionVectorPath(targetParentPath: Path, id: UUID, prefix: String = ""): Path = {
    val fileName = s"${DELETION_VECTOR_FILE_NAME_CORE}_${id}.bin"
    if (prefix.nonEmpty) {
      new Path(new Path(targetParentPath, prefix), fileName)
    } else {
      new Path(targetParentPath, fileName)
    }
  }
}
